import pandas as pd
import requests
from lxml import etree

url = 'https://movie.douban.com/top250'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
}
response = requests.get(url=url, headers=headers)

if response.status_code == 200:
    response.encoding = 'utf-8'
    # ret = response.text
    # print(response.text)
else:
    print('请求失败')

tree = etree.HTML(response.text)
items = tree.xpath("/html/body/div[3]/div[1]/div/div[1]/ol/li")

# print(items)
data = []
for i in items:
    movie_name = i.xpath('.//div/div[2]/div[1]/a/span[@class="title"]/text()')
    score = i.xpath('.//div[@class="info"]//span[@class="rating_num"]/text()')
    number_of_viewers = i.xpath('.//div[@class="info"]//span[contains(text(), "人评价")]/text()')
    print("电影名称:", movie_name[0])
    print("评分:", score[0])
    print("观看人数:", number_of_viewers[0])
    data.append([movie_name[0], score[0], number_of_viewers[0]])

df = pd.DataFrame(data, columns=['电影名称', '评分', '观看人数'])
df.to_excel('movies.xlsx', index=False)
